/*==============================================================================
================================================================================

Table IX of PSS (2020): PVS and Implied Volatility Forecast Errors
 
================================================================================ 
==============================================================================*/	


clear
clear matrix
clear mata
set maxvar 10000
graph drop _all
graph close _all
est drop _all

/*------------------------------------------------------------------------------

 Read in data and conduct appropriate merges
 
------------------------------------------------------------------------------*/


/*---------------------------------------------
Read in mapping between OptionMetrics and CRSP
-----------------------------------------------*/
import delimited "Data/om_crsp_link.csv", clear
drop if score != 1 | mi(permno)

/* To avoid merge issues, focus only on cases where permno-secid is unique.
This is over 98% of cases
*/
duplicates drop permno, force
duplicates drop secid, force

keep permno secid
tempfile om
save `om',replace


/*---------------------------------------------
Read portfolio constituents
-----------------------------------------------*/
import delimited "Data/Portfolio_Constituents/TOTAL_VOL_BE_Star_Rolling_MKTCAP_Constituents.csv", clear
gen yq = qofd(date(dtdate , "YMD"))
format yq %tq
keep permno yq quint terc total_vol ind

merge m:1 permno using `om'
keep if _merge == 1 | _merge == 3
drop if yq < yq(1996,1)
drop _merge

*For permnos with missing secids, set to negative of row number for later merge
replace secid = -1 * _n if mi(secid)

tempfile om_crsp_all
save `om_crsp_all',replace


/*------------------------------------------------------------
Read in OptionMetrics Realized Historical Volatility Surface
--------------------------------------------------------------*/
local from_scratch = 0
if `from_scratch' {
use "Data/optionm_hvol_all.dta", clear


*Resample to end of month
gen ym = mofd(date)
collapse (last) date volatility, by(ym secid days)

*Save condensed file
compress
save "Data/optionsm_hvol_monthly.dta", replace
}

else {
use "Data/optionsm_hvol_monthly.dta", clear
}


/*------------------------------------------------------------
Process OptionMetrics Realized Historical Volatility Surface
--------------------------------------------------------------*/

*Rename volatility column
rename volatility rv

*Resample to end of quarter
gen yq = qofd(date)
format yq %tq
collapse (last) date rv, by(yq secid days)

*Drop stale observations
gen ddiff = dofq(yq + 1) - date
drop if ddiff > 21
drop ddiff

*Reshape to save volatilities of all maturities
keep yq secid rv days
reshape wide rv, i(secid yq) j(days)

tempfile om_hvol
save `om_hvol',replace


/*------------------------------------------------------------
Read in OptionMetrics Standardized Volatility Surface
--------------------------------------------------------------*/
local from_scratch = 0
if `from_scratch' {
use "Data/optionm_svol_all.dta", clear

*Take average of put and call IV for each date-secid-days tuple
collapse (mean) impl_volatility, by(date secid days)

*Resample to end of month
gen ym = mofd(date)
collapse (last) date impl_volatility, by(ym secid days)

*Save condensed file
compress
save "Data/optionsm_svol_monthly.dta", replace
}

else {
use "Data/optionsm_svol_monthly.dta", clear
}


/*------------------------------------------------------------
Process OptionMetrics Realized Historical Volatility Surface
--------------------------------------------------------------*/

*Rename volatility column
rename impl_volatility iv

*Resample to end of quarter
gen yq = qofd(date)
format yq %tq
collapse (last) date iv, by(yq secid days)

*Drop stale observations
gen ddiff = dofq(yq + 1) - date
drop if ddiff > 21
drop ddiff

*Reshape to save volatilities of all maturities
keep yq secid iv days
reshape wide iv, i(secid yq) j(days)

*Merge with historical volatilites
merge 1:1 yq secid using `om_hvol'
keep if _merge == 3
drop _merge

/*------------------------------------------------------------
Merge implied volatilites with portfolio information
--------------------------------------------------------------*/
merge 1:1 secid yq using  `om_crsp_all'
keep if _merge == 2 | _merge == 3
drop _merge

/*------------------------------------------------------------
Merge with PVS
--------------------------------------------------------------*/
merge m:1 yq using pss_data_replication.dta, ///
keepusing(yq tvol_BM_Star_ew_spr)
keep if _merge == 3
drop _merge 


*Standardize pvs using its 1970q2-2016q2 moments
replace tvol_BM_Star_ew_spr = (tvol_BM_Star_ew_spr -  -.1796) / .3678
rename tvol_BM_Star_ew_spr pvs


/*------------------------------------------------------------------------------

Run regressions
 
------------------------------------------------------------------------------*/

*Regression setup
gen byte constant = 1
xtset secid yq
egen ind_yq = group(ind yq)

/* Notation: IV(t, h, k) is time-t implied volatility for returns 
   between h and k.  RV(h,k) is realized volatility between h and k */

*Generate IV(t, t+3Q, t+4Q)
gen iv_3q_4q = sqrt(4 * (iv365^2 - iv273^2*(3/4)))

/*------------------------------------------
Full-year horizons
--------------------------------------------*/

local i 1

*Generate forecast error
gen forecast_error_365 = (f4.rv365 - iv365)*100

*ForecastError(t, t+4Q) = PVS x Quintile interaction
xi: reghdfe forecast_error_365 i.quint*pvs, absorb(constant) cluster(secid yq)
est store m`i++'

*ForecastError(t, t+4Q) = PVS x Quintile interaction, ind x yq FE
xi: reghdfe forecast_error_365 i.quint*pvs, absorb(ind_yq) cluster(secid yq)
est store m`i++'

/*------------------------------------------
3Q-4Q from now
--------------------------------------------*/

*Generate forecast error
gen forecast_error_3q_4q = (f4.rv91 - iv_3q_4q)*100

*ForecastError(t, t+4Q) = PVS x Quintile interaction
xi: reghdfe forecast_error_3q_4q i.quint*pvs, absorb(constant) cluster(secid yq)
est store m`i++'

*ForecastError(t, t+4Q) = PVS x Quintile interaction, ind x yq FE
xi: reghdfe forecast_error_3q_4q i.quint*pvs, absorb(ind_yq) cluster(secid yq)
est store m`i++'

/*------------------------------------------
Output
--------------------------------------------*/

est table *, se stats(r2_a N) 
